@inproceedings{williams2001using,
  title={Using the Nystr{\"o}m method to speed up kernel machines},
  author={Williams, C. K. I. and Seeger, M. W.},
  booktitle={Advances in Neural Information Processing Systems},
  pages={682--688},
  year={2001}
}
@inproceedings{rudi2015less,
  title={Less is more: Nystr{\"o}m computational regularization},
  author={Rudi, A. and Camoriano, R. and Rosasco, L.},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1657--1665},
  year={2015}
}
@book{barber2012bayesian,
  title={Bayesian reasoning and machine learning},
  author={Barber, D.},
  year={2012},
  publisher={Cambridge University Press}
}
@book{cristianini2000introduction,
  title={An Introduction To Support Vector Machines And Other Kernel-Based Learning Methods},
  author={N. Cristianini and J. Shawe-Taylor},
  year={2000},
  publisher={Cambridge University Press}
}
@article{fan2008liblinear,
  title={{LIBLINEAR: A Library for Large Linear Classification}},
  author={R.E. Fan and K.W. Chang and C.J. Hsieh and X.R. Wang and C.J. Lin},
  journal={Journal of Machine Learning Research},
  volume={9},
  pages={1871--1874},
  year={2008}
}
@book{Rasmussen2005GPM,
  author = {Rasmussen, C. E. and Williams, C. K. I.},
  title = {Gaussian Processes for Machine Learning},
  publisher = {The MIT Press},
  year={2006},
}

@article{gretton2012kernel,
  title={A kernel two-sample test},
  author={Gretton, A. and Borgwardt, K.M. and Rasch, M.J. and Sch{\"o}lkopf, B. and Smola, A.},
  journal={The Journal of Machine Learning Research},
  volume={13},
  number={1},
  pages={723--773},
  year={2012},
}
@article{ueda2000smem,
  title={SMEM Algorithm for Mixture Models},
  author={N. Ueda and R. Nakano and Z. Ghahramani and G.E. Hinton},
  journal={Neural Computation},
  volume={12},
  number={9},
  pages={2109--2128},
  year={2000},
  publisher={MIT Press}
}
@article{chang2011libsvm,
  title={LIBSVM: a library for support vector machines},
  author={C.C. Chang and C.J. Lin},
  journal={ACM Transactions on Intelligent Systems and Technology},
  volume={2},
  number={3},
  pages={27},
  year={2011},
}
@article{chang2002training,
  title={Training v-support vector regression: theory and algorithms},
  author={C.C. Chang and C.B. Lin},
  journal={Neural Computation},
  volume={14},
  number={8},
  pages={1959--1977},
  year={2002},
}
@book{scholkopf2002learning,
  title={Learning with kernels: support vector machines, regularization, optimization, and beyond},
  author={B. Sch{\"o}lkopf and A.J. Smola},
  year={2002},
  publisher={MIT press}
}
@unpublished{Goodfellow-et-al-2016-Book,
    title={Deep Learning},
    author={I. Goodfellow and Y. Bengio and A. Courville},
    note={Book in preparation for MIT Press},
    year={2016}
}
@article{escalera2010error,
  title={Error-correcting ouput codes library},
  author={S. Escalera and O. Pujol and P. Radeva},
  journal={Journal of Machine Learning Research},
  volume={11},
  pages={661--664},
  year={2010},
 }

@inproceedings{gao2011discriminative,
  title={Discriminative learning of relaxed hierarchy for large-scale visual recognition},
  author={T. Gao and D. Koller},
  booktitle={IEEE International Conference on Computer Vision},
  pages={2072--2079},
  year={2011},
}

@inproceedings{shalev2011shareboost,
  title={Shareboost: Efficient multiclass learning with feature sharing},
  author={S. Shalev-Shwartz and Y. Wexler and A. Shashua},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1179--1187},
  year={2011}
}

@inproceedings{gretton2012optimal,
  author={Gretton, A. and Sriperumbudur, B. and Sejdinovic, D. and Strathmann, H. and Balakrishnan, S. and Pontil, M. and Fukumizu, K.},
  booktitle={Advances in Neural Information Processing Systems},
  title={{Optimal kernel choice for large-scale two-sample tests}},
  year={2012}
}
@article{sonnenburg2006large,
  title={Large scale multiple kernel learning},
  author={S. Sonnenburg and G. R{\"a}tsch and C. Sch{\"a}fer and B. Sch{\"o}lkopf},
  journal={Journal of Machine Learning Research},
  volume={7},
  pages={1531--1565},
  year={2006},
}
@article{Breiman2001,
title={Random Forests},
author={Breiman, Leo},
journal={Machine Learning},
year={2001},
volume={45},
pages={5--32}
}
@article{weinberger2009distance,
  title={Distance metric learning for large margin nearest neighbor classification},
  author={Weinberger, K.Q. and Saul, L.K.},
  journal={Journal of Machine Learning Research},
  volume={10},
  number={Feb},
  pages={207--244},
  year={2009}
}
@article{Quinonero-Candela2005,
 author = {Qui\~{n}onero-Candela, J. and Rasmussen, C. E.},
 title = {A Unifying View of Sparse Approximate Gaussian Process Regression},
 journal = {Journal of Machine Learning Research},
 volume = {6},
 year = {2005},
 pages = {1939--1959},
}

@article{hyvarinen2000independent,
  title={Independent component analysis: algorithms and applications},
  author={Hyv{\"a}rinen, A. and Oja, E.},
  journal={Neural networks},
  volume={13},
  number={4},
  pages={411--430},
  year={2000}
}
